import urllib
import requests
from bs4 import BeautifulSoup
from urllib import request

headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Encoding': 'gzip, deflate, compress',
    'Accept-Language': 'en-us;q=0.5,en;q=0.3',
    'Cache-Control': 'max-age=0',
    'Connection': 'keep-alive',
    'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100101 Firefox/22.0'
} #定义头文件，伪装成浏览器

myhd = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134'}

def getfromBaidu(word):
    
    url = 'http://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&tn=baidu&wd='+urllib.parse.quote(word)+'&rsv_pq=fda991b2000a54c6&rsv_t=20e03%2FFVyMRQeKZTIQ%2BmHcM5VzT9S2xYYAchhHLZOpcFt9S8mndA6IsrkBw&rqlang=cn&rsv_dl=tb&rsv_enter=0&rsv_sug=1'
    for x in range(2,3):
        geturl(url,x)


def geturl(url, k):

    page_path = ""
    if k != 1:
        page_path = "&pn="+str((k - 1) * 10)
    path = url + page_path
    print(path)
    response = requests.get(url=path,headers=myhd,allow_redirects=False)
    page = response.text
    soup = BeautifulSoup(page, 'lxml')
    tagh3 = soup.find_all('h3')
    
    for h3 in tagh3:
        try:
            table=h3.find('a').get_text()
            href = h3.find('a').get('href')
            print(table)
            print(href)
            baidu_url = requests.get(url=href,headers=myhd,allow_redirects=False)
            real_url = baidu_url.headers['Location']  #得到网页原始地址
            if real_url.startswith('http'):
                print(real_url)
        except Exception as e:
            print(str(e))


if __name__ == "__main__":
    mylist = ['百瑞赢']
    for x in mylist:
        getfromBaidu(x)